package com.plm.mr.etl;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class WebLogMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        // 1获取一行数据
        String line  = value.toString();

        // 2解析日志
        boolean result = parseLog(line, context);

        // 3日志不合法退出
        if (!result) {
            return;
        }

        // 4日志合法直接写出
        context.write(value, NullWritable.get());
    }

    /**
     * 封装解析日志的方法
     * @param line
     * @param context
     * @return
     */
    private boolean parseLog(String line, Context context) {
        // 1截取
        String[] fields = line.split(" ");

        // 2日志长度大于11为合法
        if (fields.length > 11) {
            return true;
        } else {
            return false;
        }
    }


}
